/*** MERGES TEST AND BASE FILES AND THEN ADDS STUDENT DEMOGRAPHICS ***/

clear all
set more 1

do "paths.do"

capture log close
log using "$LOGFILES\tracking_3.log", replace


**********************************************
*** MERGE BASE DATASET WITH MATH TEST DATA ***
**********************************************

cd "$WORKING"

** start with test file to make either id1 or id2 unique
* files to merge in current scores
foreach var in id1 id2 {
 use tracking_1, clear
 drop if in_peims_flag==0
 drop if `var'==""
 drop invalid_id* in_peims_flag
 save tmp_`var'_0, replace
}
* files to merge in prior scores
foreach var in id1 id2 {
 use tracking_1, clear
 replace year=year+1
 drop if in_peims_flag==0
 * set id1 to missing if should not be used to match across years
 replace id1="" if invalid_id1_flag==1
 drop if `var'==""
 drop invalid_id* in_peims_flag
 save tmp_`var'_1, replace
}

** PROGRAM TO READ IN PRIOR AND CURRENT YEAR SCORES
* first global is whether current (0) or prior year (1)

program define getscores

	* merge to id2
	use tracking_2, clear
	rename id1 ID1
	merge m:1 id2 year using "$WORKING\tmp_id2_`1'", update
	drop if _merge==2
	drop _merge
	rename id1 id1_test
	* set id1_test to missing if id1 should not be linked across years
	if `1'==1{
		replace id1_test="" if invalid_id1_flag==1
	}
	rename ID1 id1
	save tmp`1'.dta, replace

	di "HERE 63"

	* merge to id1
	use tracking_2, clear
	rename id2 ID2
	* exclude cases where id1 should not be linked across years
	gen holdID1=id1
	if `1'==1{
		replace id1="" if invalid_id1_flag==1
	}
	merge m:1 id1 year using "$WORKING\tmp_id1_`1'", update
	drop if _merge==2
	drop _merge
	rename id2 id2_test
	rename ID2 id2
	replace id1=holdID1
	drop holdID1

	di "HERE 81"

	* merge together
	merge 1:1 id1 id2 campus year using "$WORKING\tmp`1'", update

	* check for conflict in ID mapping
	gen tmp1=(id1!=id1_test) if id1!="" & id1_test!=""
	gen tmp2=(id2!=id2_test) if id2!="" & id2_test!=""
	bysort _merge: tab tmp1 tmp2, m
	* set test variables to missing for problematic cases
	for var M*: replace X=. if (_merge==4 & tmp1==1)|_merge==5
	drop id*_test* _merge tmp*

	* non-missing score rates by grade
	sum M*
	gen tmp=(Mscore!=.)
	tabstat tmp, by(grade)
	drop tmp*

	if `1'==1{
		rename M* LM*
		quietly {
  	                la var LMdist "prior yr district from test files"
 			la var LMcamp "prior yr campus from test files"
			la var LMscore "prior yr math score"
			la var LMmonth "prior yr math score month"
			la var LMgrade "prior yr math score grade"
			la var LMcdabs "prior yr absent for test"
			la var LMcdother "prior yr other - O"
			la var LMcdlep "prior yr LEP exempt"
			la var LMcdsped "prior yr spec ed exempt"
			la var LMcdpass "prior yr prior pass exempt"
			la var LMcdninfd "prior yr math no info - D"
			la var LMcdninfs "prior yr math no info - *"
			la var LMcdastan "prior yr alternative standard"
			la var LMsched "prior yr score not on schedule"
		}
	}
	
	di "HERE 119"

	if `1'==0{
		merge 1:1 id1 id2 campus year using "$WORKING\temp\tracking_3_A", update
		assert _merge==3
		drop _merge
	}
	
	save "$WORKING\temp\tracking_3_A", replace

end

** RUNNING PROGRAM

getscores 1
getscores 0


*************************************************
*** READ IN AND PREPARE DEMOGRAPHIC VARIABLES ***
*************************************************

/*** Variables used in this section:
sex: F-female, M-male
ethnic: H-hispanic, B-black, I-American Indian or Alaska Native,
 W-white, A-asian P-Native Hawaiian or other Pacific Islander, 
 T-two or more races, N-no information
age: string variable
Binary indicators (0,1): lep disadv risk title1 speced bil_pgm esl gifted voced
***/

use "$indata\p_attend_demog11.dta", clear
gen year=2011
append using "$indata\p_attend_demog12.dta"
replace year=2012 if year==.
append using "$indata\p_attend_demog13.dta"
replace year=2013 if year==.
* flags are in uppercase before 2013 and in lowercase post 2013 
rename INVALID_ID1_FLAG STATE_ASSIGNED_FLAG, lower(X)
append using "$indata\p_attend_demog14.dta"
replace year=2014 if year==.
append using "$indata\p_attend_demog15.dta"
replace year=2015 if year==.
append using "$indata\p_attend_demog16.dta"
replace year=2016 if year==.
append using "$indata\p_attend_demog17.dta"
replace year=2017 if year==.
append using "$indata\p_attend_demog18.dta"
replace year=2018 if year==.
append using "$indata\p_attend_demog19.dta"
replace year=2019 if year==.

rename *, lower
assert id1!=""

** gender
assert sex=="F"|sex=="M"
replace sex="0" if sex=="F"
replace sex="1" if sex=="M"
destring sex, replace
rename sex Dmale

** race
for var white asian: tab ethnic X, m
tab ethnic, m
assert ethnic!=""
gen Dhisp=(ethnic=="H")
gen Dblack=(ethnic=="B")
gen Dwhite=(ethnic=="W")
gen Dasian=(ethnic=="A")
gen Dother=(Dhisp==0&Dblack==0&Dwhite==0&Dasian==0)
drop white asian ethnic

** age
gen Dage=sept1_age
destring Dage, replace
summ Dage, d
drop sept1_age

** program variables
rename lep_attend Plep
rename economic Pdisadv
rename title1_flag Ptitle1
rename gifted_attend Pgifted
rename ve_attend Pvoced
* economic disadvantage: 00=not identified as disadvantaged, 01=eligible for free meals ///
  02=eligible for reduced-price meals, 99=other economic disadvantage (TANF, etc.)
for var Pdisadv: tab X, m \ replace X="1" if X!="00" \ destring X, replace
for any lep title1 gifted voced: assert (PX=="0"|PX=="1") \ destring PX, replace

** collapse to means
collapse (mean) D* P*, by (id1 year)
* note: can identify discrepancies across entries by fractional values for the means
for var D* P*: tab X,m
la var Dmale "male"
la var Dhisp "hispanic"
la var Dblack "black"
la var Dwhite "white"
la var Dasian "asian"
la var Dother "other race/ethnicity"
la var Dage "age as of Sept"
la var Plep "limited English proficient"
la var Pdisadv "economically disadvantaged"
la var Ptitle1 "Title I student"
la var Pgifted "in gifted/talented program"
la var Pvoced "in vocational education"
gen tmp=Dhisp+Dblack+Dwhite+Dasian+Dother
tab tmp, m
drop tmp

save "$WORKING\temp\tracking_3_B", replace


*****************************
*** MERGE TO BASE DATASET ***
*****************************

use "$WORKING\temp\tracking_3_B.dta", clear
merge 1:m id1 year using "$WORKING\temp\tracking_3_A.dta"
assert _merge!=2
keep if _merge==3
drop _merge

** add indicator for missing test score
gen Mmiss=(Mscore==.)
la var Mmiss "missing current test score"
bysort year: tabstat Mmiss if rctype==1, by(grade)

save "$WORKING\temp\tracking_3_C.dta",replace

********************************************************
*** READ IN Enrollment Data to get Bilingual and ESL ***
********************************************************
/*** Variables used in this section:
BIL_PGM: 0-NOT APPLICABLE, 2-TRANSITIONAL BILING/EARLY EXIT, 
 3-TRANSITIONAL BILING/LATE EXIT,
 4-DUAL LANG IMMERSION/TWO-WAY, 5-DUAL LANG IMMERSION/ONE-WAY
ESL_PGM: 0-NOT APPLICABLE, 2-ENGLISH AS A SECOND LANGUAGE/CONTENT-BASED,
 3-ENGLISH AS A SECOND LANGUAGE/PULL-OUT
***/
 
use "$indata\p_enroll_demog11f.dta", clear
gen year=2011
append using "$indata\p_enroll_demog12f.dta"
replace year=2012 if year==.
append using "$indata\p_enroll_demog13f.dta"
replace year=2013 if year==.
append using "$indata\p_enroll_demog14f.dta"
replace year=2014 if year==.
append using "$indata\p_enroll_demog15f.dta"
replace year=2015 if year==.
append using "$indata\p_enroll_demog16f.dta"
replace year=2016 if year==.
append using "$indata\p_enroll_demog17f.dta"
replace year=2017 if year==.
append using "$indata\p_enroll_demog18f.dta"
replace year=2018 if year==.
append using "$indata\p_enroll_demog19f.dta"
replace year=2019 if year==.

rename *, lower
assert id1!=""

**Split BIL_PGM into Two-way and others
destring bil_pgm, replace
gen Pbil_non2way=0
replace Pbil_non2way=1 if bil_pgm==2 | bil_pgm==3 | bil_pgm==5
gen Pbil_2way=0
replace Pbil_2way=1 if bil_pgm==4

**Split ESL_PGM into Content-based and Pull-out
destring esl_pgm, replace
gen Pesl_con=0
replace Pesl_con=1 if esl_pgm==2
gen Pesl_pull=0
replace Pesl_pull=1 if esl_pgm==3

*All missing data (if any) will be treated as 0

**collapse to means to identify discrepancies
collapse (mean) P*, by (id1 year)
la var Pbil_non2way "in a non-twoway bilingual program"
la var Pbil_2way "in a twoway bilingual program"
la var Pesl_con "in a Content-based ESL program"
la var Pesl_pull "in a Pull-out ESL program"

save "$WORKING\temp\tracking_3_D", replace

*****************************
*** MERGE TO BASE DATASET ***
*****************************
use "$WORKING\temp\tracking_3_D.dta", clear
merge 1:m id1 year using "$WORKING\temp\tracking_3_C.dta"
drop if _merge==1
replace Pesl_con=0 if _merge==2
replace Pesl_pull=0 if _merge==2
replace Pbil_non2way=0 if _merge==2
replace Pbil_2way=0 if _merge==2
drop _merge

save "$WORKING\temp\tracking_3_E", replace

*********************************************
*** READ IN SPECED Data to get DISABILITY ***
*********************************************
/*** Variables used in this section:
DISABILITY1: 0-NO DISABILITY, 1-ORTHOPEDIC IMPAIRMENT, 
 2-OTHER HEALTH IMPAIRMENT 3-AUDITORY IMPAIRMENT, 4-VISUAL IMPAIRMENT, 
 5-DEAF/BLIND, 6-MENTAL RETARDATION, 7-EMOTIONAL DISTURBANCE
 8-LEARNING DISABILITY, 9-SPEECH IMPAIRMENT, 10-AUTISM
 12-DEVELOPMENTAL DELAY, 13-TRAUMATIC BRAIN INJURY, 
 14-NONCATEGORICAL EARLY CHILDHOOD
setting: 0-NO INSTRUCT SETTING-USUAL 3&>, 1-HOMEBOUND-USUAL 3&>,
 2-HOSPITAL CLASS-USUAL 3&>, 3-UNKNOWN, 8-VOC ADJUST CLS/PGM-USUAL 3&>,
 13-IN-SCHOOL SUSPENSION-STAFF, 15-SCHL-COMM GUID CNTR-STAFF
 16-ALT ED PGM-DISCIP-STAFF, 18-TRAINING STATION
 19-TELEVISED INSTRUCT-STAFF, 30-ST SCHL-MNTL RET-USUAL 3&>
 31-HOME-BASED INSTRUCT-ECI 0-2, 32-CENT-BASED INSTRUCT-ECI 0-2
 34-OTHER ENVIRONMENT-ECI 0-2, 40-MAINSTREAM-USUAL 3&>
 41-RESRCE ROOM/SERV-LESS THAN 21%, 42-RESRCE ROOM/SERV-=>21% & <50%
 43-SELFCONTMILD/MOD/SEVREG50-60, 44-SELFCONTMILD/MOD/SEVREG>60%
 45-FULL-TIME ECH SE SET (AGE 3-5), 50-RESIDNTL NONPUB SCHL-USUAL 3&>, 
 60-NONPUB DAY SCHOOL-USUAL 3&>, 70-TX SCHL FOR BLIND-USUAL 3&>
 71-TX SCHL FOR DEAF-USUAL 3&>, 75-NON-DISCIP ALT ED-STAFF
 80-REGULAR CLASS-STAFF, 81-RES CARE/TRT FAC-MAIN
 82-RES CARE/TRT FAC-RESRC-<21%, 83-RES CARE/TRT FAC-RESRC-21-49%
 84-RES CARE/TRT FAC-CONT-50-60%, 85-RES CARE/TRT FAC-CONT->60%
 86-RES CARE/TRT FAC-SEPARATE CAMP, 87-RES CARE/TRT FAC-COMM CLASS
 88-RES CARE/TRT FAC-VOC ADJUSTMNT, 89-RES CARE/TRT FAC-FULL ECSE 3-5
 91-OFF HOME CAMPUS-MAINSTREAM, 92-OFF HOME CAMPUS-RESRC-<21%
 93-OFF HOME CAMPUS-RESRC-21-49%, 94-OFF HOME CAMPUS-CONT-50-60%
 95-OFF HOME CAMPUS-CONT->60%, 96-OFF HOME CAMPUS-SEPARATE CAMP
 97-OFF HOME CAMPUS-COMM CLASS, 98-OFF HOME CAMPUS-FULL ECSE 3-5
***/

use "$indata\p_speced_student11f.dta", clear
gen year=2011
append using "$indata\p_speced_student12f.dta"
replace year=2012 if year==.
append using "$indata\p_speced_student13f.dta"
replace year=2013 if year==.
append using "$indata\p_speced_student14f.dta"
replace year=2014 if year==.
append using "$indata\p_speced_student15f.dta"
replace year=2015 if year==.
append using "$indata\p_speced_student16f.dta"
replace year=2016 if year==.
append using "$indata\p_speced_student17f.dta"
replace year=2017 if year==.
append using "$indata\p_speced_student18f.dta"
replace year=2018 if year==.
append using "$indata\p_speced_student19f.dta"
replace year=2019 if year==.

rename *, lower
assert id1!=""

**Split DISABILITY1 into four categories
destring disability1, replace
*Physical and Severe: 1,2,3,4,5,6,13
gen Pdis_phy=0
replace Pdis_phy=1 if inlist(disability1, 1, 3, 4, 6, 13) // (disability1>=1 & disability1<=6) | (disability1==13)
*Malleable: 7,8
gen Pdis_mal=0
// replace Pdis_mal=1 if disability1==7 | disability1==8
*Speech: 9
gen Pdis_spe=0
// replace Pdis_spe=1 if disability1==9
*Other: 10, 12, 14
gen Pdis_oth=0
replace Pdis_oth=1 if inlist(disability1, 2, 7, 8, 9, 10, 12, 14) // disability1==10 | disability1==12 | disability1==14

**Add Indicator for Restrictive Placement
destring setting, replace
gen Pspedr=0
replace Pspedr = 1 if inlist(setting, 1, 2, 8, 30, 50, 60, 43, 44)
replace Pspedr = 1 if (setting >= 84) & (setting <= 88)
replace Pspedr = 1 if (setting >= 94) & (setting <= 97)

*All missing data (if any) will be treated as 0 for all the 5 indicators

**collapse to means to identify discrepancies
collapse (mean) P*, by (id1 year)
la var Pdis_phy "Physical and Severe Disability"
la var Pdis_mal "Malleable Disability"
la var Pdis_spe "Speech Disability"
la var Pdis_oth "Other Disability"
la var Pspedr "student served in restrictive setting"

save "$WORKING\temp\tracking_3_F", replace

*****************************
*** MERGE TO BASE DATASET ***
*****************************
use "$WORKING\temp\tracking_3_F.dta", clear
merge 1:m id1 year using "$WORKING\temp\tracking_3_E.dta"
drop if _merge==1
replace Pdis_phy=0 if _merge==2
replace Pdis_mal=0 if _merge==2
replace Pdis_spe=0 if _merge==2
replace Pdis_oth=0 if _merge==2
replace Pspedr=0 if _merge==2
drop _merge

save "$WORKING\temp\tracking_3_G", replace

************************************************
*** ADD CAMPUS-GRADE LEVEL AVERAGES ***
************************************************

use "$WORKING\temp\tracking_3_G.dta", clear

assert campus!=""
egen tmp=tag(id1 campus grade year), missing
assert tmp==1

** students with missing grades are grouped together for these averages
collapse (mean) D* P*, by(campus grade year)
for any male hisp black white asian other age: ///
 la var DX "X avg campus*grade" \ rename DX CGX
for any lep disadv title1 gifted voced spedr delinqu bil_non2way bil_2way esl_con esl_pull dis_phy dis_mal dis_spe dis_oth: ///
 la var PX "X avg campus*grade" \ rename PX CGX

save "$WORKING\temp\tracking_3_H.dta",replace

use "$WORKING\temp\tracking_3_H.dta", clear
merge 1:m campus grade year using "$WORKING\temp\tracking_3_G.dta", update
assert _merge==3
drop _merge

order id1 id2 invalid_id1_flag state_assig campus grade year D* P* M* LM* ///
grade_lo grade_hi att* sixweek* num_att num_trns rctype* ///
campname C* CG*
descr
summ
save "$WORKING\tracking_3.dta", replace

rm "$WORKING\tmp_id1_0.dta"
rm "$WORKING\tmp_id2_0.dta"
rm "$WORKING\tmp_id1_1.dta"
rm "$WORKING\tmp_id2_1.dta"
rm "$WORKING\tmp0.dta"
rm "$WORKING\tmp1.dta"

log close
